##############################################################################
#
# PamlJob class:
# a class containing the logic for writing codeml and baseml *.ctl files,
# and logic to parse and store the results from codeml and baseml.
#
#
# Author: Victor Hanson-Smith
# Contact: victorhs@cs.uoregon.edu
#
###############################################################################
import os, re, time

class PamlJob:
    def __init__(self):
        self.treeNumber = None
        self.treePath = None
        self.alignmentPath = None
        self.executionDirectory = None
        self.controlPath = None
        self.estimate_branch_lengths = False
        self.among_site_rate_variation = False
        self.fix_asrv = False
        self.alpha = 1.0
        self.ncat_gamma = 4
        self.cleandata = 0
                        
    #
    # Call this method after the method named self.startJob is finished.
    # This method verifies that PAML built the correct files.
    #
    # If PAML job finished correctly, then this method will return True
    # If we discover any errors in the PAML output, then this method will return an error message (i.e. a string)
    #
    def verifyResults(self):
        # does the file named 'rst' exist?
        if False == os.path.exists(self.executionDirectory + "/rst"):
            return "The PAML job on tree " + self.treeNumber.__str__() + " did not produce a file named 'rst'.  I was expecting to find this file at " + self.executionDirectory + "/rst"
        # does the file named 'out.paml' exist?
        if False == os.path.exists(self.executionDirectory + "/out.paml"):
            return "The PAML job on tree " + self.treeNumber.__str__() + " did not produce a file named 'out.paml'.  I was expecting to find this file at " + self.executionDirectory + "/out.paml"

        return True


class CodemlJob(PamlJob):
    def __init__(self):
        PamlJob.__init__(self)
        self.modelPath = None
                
    #
    # This method assumes the following variables are instantiated:
    # self.treePath, self.treeNumber, self.alignmentPath, self.codemlControlPath, and self.executionDirectory
    #
    def startJob(self):
        self.writeControlFile()
        previous_directory = os.getcwd()
        os.chdir(self.executionDirectory)
        os.system("codeml")
        os.chdir(previous_directory)

    #
    # This is a helper method for self.startJob
    # This method writes codeml.ctl into self.executionDirectory
    #
    def writeControlFile(self):
        self.controlPath = self.executionDirectory + "/codeml.ctl"
        f = open(self.controlPath, "w")
        
        f.write("*\n")
        f.write("* This file was auto-generated by Lazarus. " + time.localtime().__str__() + "\n")
        f.write("*\n")
        f.write("* For more information about the parameters in this file,\n* please consult the official PAML documenation.\n")
        f.write("*\n")
        
        # remove the path to the alignment (We want just the filename)
        a = self.alignmentPath
        tokens = a.split("/")
        alignmentPath = tokens[ tokens.__len__() - 1]
        f.write("seqfile = " + alignmentPath + "\n")
        # remove the path to the tree (We want just the filename)
        a = self.treePath
        tokens = a.split("/")
        treePath = tokens[ tokens.__len__() -1 ]
        f.write("treefile = " + treePath + "\n")
        f.write("outfile = out.paml\n")
        f.write("noisy = 3\n")
        f.write("verbose = 9\n")
        f.write("runmode = 0\n")
        f.write("seqtype = 2\n")
        f.write("aaRatefile = model.dat\n")
        f.write("model = 3\n")
        if self.fix_asrv == False:
            f.write("fix_alpha = 0\n")
        else:
            f.write("fix_alpha = 1\n")            
        if self.among_site_rate_variation:
            f.write("alpha = " + self.alpha.__str__() + "\n")
            f.write("ncatG = 4\n")
        else:
            f.write("alpha = 0\n")
        f.write("RateAncestor = 2\n")
        f.write("Small_Diff = 1.0e-6\n")
        f.write("cleandata = " + self.cleandata.__str__() + "\n") # remove sites with "X" or "?" or other ambiguity
        f.write("method = 1\n")
        if self.estimate_branch_lengths == False:
            f.write("fix_blength = 2\n") # fix the branch lengths at their values in the tree file.
        else:
            f.write("fix_blength = 1\n") # use the branch lengths as a starting value for the Ml search
        f.close()
        
class BasemlJob(PamlJob):
    def __init__(self):
        PamlJob.__init__(self)
        self.modelName = None
                
    #
    # This method assumes the following variables are instantiated:
    # self.treePath, self.treeNumber, self.alignmentPath, self.codemlControlPath, and self.executionDirectory
    #
    def startJob(self):
        self.writeControlFile()
        previous_directory = os.getcwd()
        os.chdir(self.executionDirectory)
        os.system("baseml")
        os.chdir(previous_directory)
        
    def get_number_for_model(self, modelName):
        if modelName == "JC69":
            return 0
        elif modelName == "K80":
            return 1
        elif modelName == "F81":
            return 2
        elif modelName == "F84":
            return 3
        elif modelName == "HKY85":
            return 4
        elif modelName == "T92":
            return 5
        elif modelName == "TN93":
            return 6
        elif modelName == "REV":
            return 7
        elif modelName == "UNREST":
            return 8
        elif modelName == "REVu":
            return 9
        elif modelName == "UNRESTu":
            return 10

    #
    # This is a helper method for self.startJob
    # This method writes baseml.ctl into self.executionDirectory
    #
    def writeControlFile(self):
        self.controlPath = self.executionDirectory + "/baseml.ctl"
        f = open(self.controlPath, "w")
        
        f.write("*\n")
        f.write("* This file was auto-generated by Lazarus. " + time.localtime().__str__() + "\n")
        f.write("*\n")
        f.write("* For more information about the parameters in this file,\n* please consult the official PAML documenation.\n")
        f.write("*\n")
        
        # remove the path to the alignment (We want just the filename)
        a = self.alignmentPath
        tokens = a.split("/")
        alignmentPath = tokens[ tokens.__len__() - 1]
        f.write("seqfile = " + alignmentPath + "\n")
        # remove the path to the tree (We want just the filename)
        a = self.treePath
        tokens = a.split("/")
        treePath = tokens[ tokens.__len__() -1 ]
        f.write("treefile = " + treePath + "\n")
        f.write("outfile = out.paml\n")
        f.write("noisy = 3\n")
        f.write("verbose = 9\n")
        f.write("runmode = 0\n")
        modelnumber = self.get_number_for_model(self.modelName)
        f.write("model = " + modelnumber.__str__() + "\n")
        if self.fix_asrv == False:
            f.write("fix_alpha = 0\n")        
        else:
            f.write("fix_alpha = 1\n")            
        if self.among_site_rate_variation:
            f.write("ncatG = 4\n")
        f.write("alpha = " + self.alpha.__str__() + "\n")
        
        f.write("RateAncestor = 2\n")
        f.write("Small_Diff = 1.0e-6\n")
        f.write("cleandata = " + self.cleandata.__str__() + "\n") # remove sites with "X" or "?" or other ambiguity
        f.write("method = 1\n")
        if self.estimate_branch_lengths == False:
            f.write("fix_blength = 2\n") # fix the branch lengths at their values in the tree file.
        else:
            f.write("fix_blength = 1\n") # use the branch lengths as a starting value for the Ml search
        f.close()   